LayerNormFusion
对输入数组沿最后一维或指定维度执行 LayerNorm 归一化,并应用可学习的 gamma 和 beta 参数。
\[\text{dst}_i = \frac{\text{src}_i - \mu}{\sqrt{\sigma^2 + \epsilon}} \cdot \gamma_i + \beta_i
\quad \text{where} \quad
\mu = \frac{1}{N} \sum_{j=1}^{N} \text{src}_j,\quad
\sigma^2 = \frac{1}{N} \sum_{j=1}^{N} (\text{src}_j - \mu)^2\]
- 输入:
src_data - 输入数据地址。
gamma_data - 缩放参数 gamma。
beta_data - 偏置参数 beta。
param_inner_size - 参数内层尺寸。
param_outer_size - 参数外层尺寸。
norm_inner_size - 归一化内层尺寸。
norm_outer_size - 归一化外层尺寸。
epsilon - 避免除零的小常数。
task_id - 当前核心ID(仅适用于私有存储版本)。
thread_num - 核心总数(仅适用于私有存储版本,固定值1)。
core_mask - 核掩码(仅适用于共享存储版本)。
- 输出:
dst_data - 输出数据地址。
out_mean - 每个归一化单元的均值(可选)。
out_variance - 每个归一化单元的方差(可选)。
- 支持平台:
FT78NEMT7004
备注
FT78NE 支持fp, int8
MT7004 支持hp, fp
共享存储版本:
-
void fp_layernormfusion_s(float *Input0, float *output, float *gamma_data, float *beta_data, float *out_mean, float *out_variance, int param_inner_size, int param_outer_size, int norm_inner_size, int norm_outer_size, float epsilon, int length, int core_mask)
-
void hp_layernormfusion_s(half *src_data, half *gamma_data, half *beta_data, half *dst_data, half *out_mean, half *out_variance, int param_inner_size, int param_outer_size, int norm_inner_size, int norm_outer_size, float epsilon, int length, int core_mask)
-
void i8_layernormfusion_s(int8_t *src_data, int8_t *gamma_data, int8_t *beta_data, int8_t *dst_data, float *out_mean, float *out_variance, int param_inner_size, int param_outer_size, int norm_inner_size, int norm_outer_size, float epsilon, int length, int core_mask)
C调用示例:
1#include <stdio.h>
2#include <layernormfusion.h>
3
4int main() {
5 float *src = (float *)0xA0000000; // 输入在DDR空间
6 float *gamma = (float *)0xA1000000;
7 float *beta = (float *)0xA2000000;
8 float *dst = (float *)0xC0000000;
9 float *out_mean = (float *)0xD0000000;
10 float *out_variance = (float *)0xD1000000;
11 int param_inner_size = 16;
12 int param_outer_size = 1;
13 int norm_inner_size = 16;
14 int norm_outer_size = 2;
15 float epsilon = 1e-5;
16 int core_mask = 0xff;
17 int length = 1024;
18
19 fp_layernormfusion_s(src, gamma, beta, dst, out_mean, out_variance, param_inner_size, param_outer_size, norm_inner_size, norm_outer_size, epsilon, length, core_mask);
20 return 0;
21}
私有存储版本:
-
void fp_layernormfusion_p(float *src_data, float *gamma_data, float *beta_data, float *dst_data, float *out_mean, float *out_variance, int param_inner_size, int param_outer_size, int norm_inner_size, int norm_outer_size, float epsilon)
-
void hp_layernormfusion_p(half *src_data, half *gamma_data, half *beta_data, half *dst_data, half *out_mean, half *out_variance, int param_inner_size, int param_outer_size, int norm_inner_size, int norm_outer_size, float epsilon)
-
void i8_layernormfusion_p(int8_t *src_data, int8_t *gamma_data, int8_t *beta_data, int8_t *dst_data, float *out_mean, float *out_variance, int param_inner_size, int param_outer_size, int norm_inner_size, int norm_outer_size, float epsilon)
C调用示例:
1#include <stdio.h>
2#include <layernormfusion.h>
3
4int main() {
5 float *src = (float *)0x10810000; // 输入在L2空间
6 float *gamma = (float *)0x10811000;
7 float *beta = (float *)0x10812000;
8 float *dst = (float *)0x10820000;
9 float *out_mean = (float *)0x10821000;
10 float *out_variance = (float *)0x10822000;
11 int param_inner_size = 16;
12 int param_outer_size = 1;
13 int norm_inner_size = 16;
14 int norm_outer_size = 2;
15 float epsilon = 1e-5;
16
17 fp_layernormfusion_p(src, gamma, beta, dst, out_mean, out_variance, param_inner_size, param_outer_size, norm_inner_size, norm_outer_size, epsilon);
18 return 0;
19}